{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d124d22-de73-436b-86cd-9b162b469be8",
   "metadata": {
    "id": "2d124d22-de73-436b-86cd-9b162b469be8"
   },
   "outputs": [],
   "source": [
    "%pip install langchain_community\n",
    "%pip install langchain_experimental\n",
    "%pip install langchain-openai\n",
    "%pip install langchainhub\n",
    "%pip install chromadb\n",
    "%pip install langchain\n",
    "%pip install python-dotenv\n",
    "%pip install PyPDF2 -q --user\n",
    "%pip install rank_bm25\n",
    "\n",
    "# New installations for supporting new retrievers\n",
    "%pip install langchain_core\n",
    "%pip install --upgrade --quiet wikipedia"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f884314f-870c-4bfb-b6c1-a5b4801ec172",
   "metadata": {
    "executionInfo": {
     "elapsed": 7390,
     "status": "ok",
     "timestamp": 1715455737041,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "f884314f-870c-4bfb-b6c1-a5b4801ec172"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import openai\n",
    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "from langchain import hub\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "import chromadb\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_core.runnables import RunnableParallel\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "from langchain_core.prompts import PromptTemplate\n",
    "from PyPDF2 import PdfReader\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "from langchain_core.documents.base import Document\n",
    "from langchain_community.retrievers import BM25Retriever\n",
    "from langchain.retrievers import EnsembleRetriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "eba3468a-d7c2-4a79-8df2-c335542950f2",
   "metadata": {
    "executionInfo": {
     "elapsed": 5,
     "status": "ok",
     "timestamp": 1715455737042,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "eba3468a-d7c2-4a79-8df2-c335542950f2"
   },
   "outputs": [],
   "source": [
    "# variables\n",
    "_ = load_dotenv(dotenv_path='env.txt')\n",
    "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')\n",
    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
    "embedding_function = OpenAIEmbeddings()\n",
    "llm = ChatOpenAI(model_name=\"gpt-4o-mini\")\n",
    "pdf_path = \"google-2023-environmental-report.pdf\"\n",
    "collection_name = \"google_environmental_report\"\n",
    "str_output_parser = StrOutputParser()\n",
    "user_query = \"What are Google's environmental initiatives?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d3ad428a-3eb6-40ec-a1a5-62565ead1e5b",
   "metadata": {
    "id": "d3ad428a-3eb6-40ec-a1a5-62565ead1e5b"
   },
   "outputs": [],
   "source": [
    "#### INDEXING ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "98ccda2c-0f4c-41c5-804d-2227cdf35aa7",
   "metadata": {
    "executionInfo": {
     "elapsed": 19512,
     "status": "ok",
     "timestamp": 1715455756551,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "98ccda2c-0f4c-41c5-804d-2227cdf35aa7"
   },
   "outputs": [],
   "source": [
    "# Load the PDF and extract text\n",
    "pdf_reader = PdfReader(pdf_path)\n",
    "text = \"\"\n",
    "for page in pdf_reader.pages:\n",
    "    text += page.extract_text()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "927a4c65-aa05-486c-8295-2f99673e7c20",
   "metadata": {
    "executionInfo": {
     "elapsed": 12,
     "status": "ok",
     "timestamp": 1715455756552,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "927a4c65-aa05-486c-8295-2f99673e7c20"
   },
   "outputs": [],
   "source": [
    "# Split\n",
    "character_splitter = RecursiveCharacterTextSplitter(\n",
    "    separators=[\"\\n\\n\", \"\\n\", \". \", \" \", \"\"],\n",
    "    chunk_size=1000,\n",
    "    chunk_overlap=200\n",
    ")\n",
    "splits = character_splitter.split_text(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "13716677-2fa1-4963-a885-9841e3258ef1",
   "metadata": {},
   "outputs": [],
   "source": [
    "dense_documents = [Document(page_content=text, metadata={\"id\": str(i), \"source\": \"dense\"}) for i, text in enumerate(splits)]\n",
    "sparse_documents = [Document(page_content=text, metadata={\"id\": str(i), \"source\": \"sparse\"}) for i, text in enumerate(splits)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6b13568c-d633-464d-8c43-0d55f34cc8c1",
   "metadata": {
    "executionInfo": {
     "elapsed": 9471,
     "status": "ok",
     "timestamp": 1715455766015,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "6b13568c-d633-464d-8c43-0d55f34cc8c1"
   },
   "outputs": [],
   "source": [
    "# Chroma Vector Store\n",
    "chroma_client = chromadb.Client()\n",
    "dense_documents = [Document(page_content=text, metadata={\"id\": str(i), \"source\": \"dense\"}) for i, text in enumerate(splits)]\n",
    "sparse_documents = [Document(page_content=text, metadata={\"id\": str(i), \"source\": \"sparse\"}) for i, text in enumerate(splits)]\n",
    "vectorstore = Chroma.from_documents(\n",
    "    documents=dense_documents,\n",
    "    embedding=embedding_function,\n",
    "    collection_name=collection_name,\n",
    "    client=chroma_client\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "749fcce7-3203-49e8-a62f-ecf2edce4570",
   "metadata": {},
   "outputs": [],
   "source": [
    "dense_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 10})\n",
    "sparse_retriever = BM25Retriever.from_documents(sparse_documents, k=10)\n",
    "ensemble_retriever = EnsembleRetriever(retrievers=[dense_retriever, sparse_retriever], weights=[0.5, 0.5], c=0, k=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "1b9bba86-96c1-42fb-a9a2-c20b15528c7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# adding similarity score threshold\n",
    "dense_retriever = vectorstore.as_retriever(\n",
    "    search_type=\"similarity_score_threshold\", \n",
    "    search_kwargs={\"score_threshold\": 0.5}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "efb68a11-ff24-4166-b62c-ff6ec9b34724",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Searching with Maximum marginal relevance (MMR)\n",
    "dense_retriever = vectorstore.as_retriever(\n",
    "    search_type=\"mmr\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "fd8c0b80-02d1-4e5b-a286-4882150eab1c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n",
      "  warn_deprecated(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First document returned:\n",
      "\n",
      "Title: Golden Age of Piracy\n",
      "\n",
      "Summary: The Golden Age of Piracy is a common designation for the period between the 1650s and the 1730s, when maritime piracy was a significant factor in the histories of the North Atlantic and Indian Oceans.\n",
      "Histories of piracy often subdivide the Golden Age of Piracy into three periods:\n",
      "\n",
      "The buccaneering period (approximately 1650 to 1680), characterized by Anglo-French seamen based in Jamaica and Tortuga attacking Spanish colonies, and shipping in the Caribbean and eastern Pacific to western Pacific.\n",
      "The Pirate Round (1690s), associated with long-distance voyages from the Americas to rob  East India Company targets in the Indian Ocean and Red Sea.\n",
      "The post-Spanish Succession period (1715 to 1726), when English sailors and privateers left unemployed by the end of the War of the Spanish Succession turned en masse to piracy in the Caribbean, the Indian Ocean, the North American eastern seaboard, and the West African coast.\n",
      "Narrower definitions of the Golden Age sometimes exclude the first or second periods, but most include at least some portion of the third. The modern conception of pirates as depicted in popular culture is derived largely, although not always accurately, from the Golden Age of Piracy.\n",
      "Factors contributing to piracy during the Golden Age included the rise in quantities of valuable cargoes being shipped to Europe over vast ocean areas, reduced European navies in certain regions, the training and experience that many sailors had gained in European navies (particularly the British Royal Navy), and corrupt and ineffective government in European overseas colonies. Colonial powers at the time constantly fought with pirates and engaged in several notable battles and other related events.\n",
      "\n",
      "\n",
      "\n",
      "Source: https://en.wikipedia.org/wiki/Golden_Age_of_Piracy\n",
      "\n",
      "Page content:\n",
      "\n",
      "The Golden Age of Piracy is a common designation for the period between the 1650s and the 1730s, when maritime piracy was a significant factor in the histories of the North Atlantic and Indian Oceans.\n",
      "Histories of piracy often subdivide the Golden Age of Piracy into three periods:\n",
      "\n",
      "The buccaneering period (approximately 1650 to 1680), characterized by Anglo-French seamen based in Jamaica and Tortuga attacking Spanish colonies, and shipping in the Caribbean and eastern Pacific to western Pacific.\n",
      "The Pirate Round (1690s), associated with long-distance voyages from the Americas to rob  East India Company targets in the Indian Ocean and Red Sea.\n",
      "The post-Spanish Succession period (1715 to 1726), when English sailors and privateers left unemployed by the end of the War of the Spanish Succession turned en masse to piracy in the Caribbean, the Indian Ocean, the North American eastern seaboard, and the West African coast.\n",
      "Narrower definitions of the Golden Age sometimes exclude the first or second periods, but most include at least some portion of the third. The modern conception of pirates as depicted in popular culture is derived largely, although not always accurately, from the Golden Age of Piracy.\n",
      "Factors contributing to piracy during the Golden Age included the rise in quantities of valuable cargoes being shipped to Europe over vast ocean areas, reduced European navies in certain regions, the training and experience that many sailors had gained in European navies (particularly the British Royal Navy), and corrupt and ineffective government in European overseas colonies. Colonial powers at the time constantly fought with pirates and engaged in several notable battles and other related events.\n",
      "\n",
      "\n",
      "== Name of the Golden Age ==\n",
      "\n",
      "\n",
      "=== Origin ===\n",
      "\n",
      "The oldest known literary mention of a \"Golden Age\" of piracy is from 1894, when the English journalist George Powell wrote about \"What appears to have been the golden age of piracy up to the last decade of the 17th century.\"  Powell uses the phrase while reviewing Charles Leslie's A New and Exact History of Jamaica, then over 150 years old. Powell uses the phrase only once.\n",
      "In 1897, a more systematic use of the phrase \"Golden Age of Piracy\" was introduced by historian John Fiske, who wrote, \"At no other time in the world's history has the business of piracy thriven so greatly as in the seventeenth century and the first part of the eighteenth. Its golden age may be said to have extended from about 1650 to about 1720.\"  Fiske included the activities of the Barbary corsairs and East Asian pirates in this \"Golden Age,\" noting that \"as these Mussulman pirates and those of Eastern Asia were as busily at work in the seventeenth century as at any other time, their case does not impair my statement that the age of the buccaneers was the Golden Age of piracy.\"\n",
      "Pirate historians of the first half of the 20th century occasionally adopted Fiske's term \"Golden Age,\" without necessarily following his beginning and ending dates for it.  The most expansive definition of an age of piracy was that of Patrick Pringle, who wrote in 1951 that \"the most flourishing era in the history of piracy ... began in the reign of Queen Elizabeth I and ended in the second decade of the eighteenth century.\"  This idea starkly contradicted Fiske, who had hotly denied that such Elizabethan figures as Drake were pirates.\n",
      "\n",
      "\n",
      "=== Trend toward narrow definitions ===\n",
      "Of recent definitions, that given by Pringle appears to have the widest range, an exception to an overall trend among historians from 1909 until the 1990s, toward narrowing the Golden Age.  As early as 1924, Philip Gosse described piracy as being at its height \"from 1680 until 1730.\" In his highly popular 1978 book The Pirates for TimeLife's The Seafarers series, Douglas Botting defined the Golden Age as lasting \"barely 30 years, starting at the close of the 17th Century and ending in the first quarter of the 18th.\"  Botting's definition was closely followed by Frank Sher\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.retrievers import WikipediaRetriever\n",
    "\n",
    "retriever = WikipediaRetriever(load_max_docs=10)\n",
    "\n",
    "docs = retriever.get_relevant_documents(query=\"What defines the golden age of piracy in the Caribbean?\")\n",
    "\n",
    "metadata_title = docs[0].metadata['title']\n",
    "metadata_summary = docs[0].metadata['summary']\n",
    "metadata_source = docs[0].metadata['source']\n",
    "page_content = docs[0].page_content \n",
    "print(f\"First document returned:\\n\")\n",
    "print(f\"Title: {metadata_title}\\n\")\n",
    "print(f\"Summary: {metadata_summary}\\n\")\n",
    "print(f\"Source: {metadata_source}\\n\")\n",
    "print(f\"Page content:\\n\\n{page_content}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4ecaa060-2df4-4ea7-b286-3795ec92c098",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use kNN for dense retriever\n",
    "from langchain_community.retrievers import KNNRetriever\n",
    "\n",
    "dense_retriever = KNNRetriever.from_texts(splits, embedding_function, k=10)\n",
    "# Re-initialize the ensemble retriever\n",
    "ensemble_retriever = EnsembleRetriever(retrievers=[dense_retriever, sparse_retriever], weights=[0.5, 0.5], c=0, k=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "6ce8df01-925b-45b5-8fb8-17b5c40c581f",
   "metadata": {
    "id": "6ce8df01-925b-45b5-8fb8-17b5c40c581f"
   },
   "outputs": [],
   "source": [
    "#### RETRIEVAL and GENERATION ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "fac053d8-b871-4b50-b04e-28dec9fb3b0f",
   "metadata": {
    "executionInfo": {
     "elapsed": 355,
     "status": "ok",
     "timestamp": 1715455766356,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "fac053d8-b871-4b50-b04e-28dec9fb3b0f"
   },
   "outputs": [],
   "source": [
    "# Prompt\n",
    "prompt = hub.pull(\"jclemens24/rag-prompt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "5ef30632-13dd-4a34-af33-cb8fab94f169",
   "metadata": {
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1715455766356,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "5ef30632-13dd-4a34-af33-cb8fab94f169"
   },
   "outputs": [],
   "source": [
    "# Relevance check prompt\n",
    "relevance_prompt_template = PromptTemplate.from_template(\n",
    "    \"\"\"\n",
    "    Given the following question and retrieved context, determine if the context is relevant to the question.\n",
    "    Provide a score from 1 to 5, where 1 is not at all relevant and 5 is highly relevant.\n",
    "    Return ONLY the numeric score, without any additional text or explanation.\n",
    "\n",
    "    Question: {question}\n",
    "    Retrieved Context: {retrieved_context}\n",
    "\n",
    "    Relevance Score:\"\"\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "e8975479-b3e3-481d-ad7b-08b4eb3faaef",
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1715455766356,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "e8975479-b3e3-481d-ad7b-08b4eb3faaef"
   },
   "outputs": [],
   "source": [
    "# Post-processing\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "fd9db713-f705-4b65-800e-2c4e3d0e4ef4",
   "metadata": {
    "executionInfo": {
     "elapsed": 6,
     "status": "ok",
     "timestamp": 1715455766512,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "fd9db713-f705-4b65-800e-2c4e3d0e4ef4"
   },
   "outputs": [],
   "source": [
    "def extract_score(llm_output):\n",
    "    try:\n",
    "        score = float(llm_output.strip())\n",
    "        return score\n",
    "    except ValueError:\n",
    "        return 0\n",
    "\n",
    "def conditional_answer(x):\n",
    "    relevance_score = extract_score(x['relevance_score'])\n",
    "    if relevance_score < 4:\n",
    "        return \"I don't know.\"\n",
    "    else:\n",
    "        return x['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "66eebc20-8814-408d-b8b1-c52365d177cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "rag_chain_from_docs = (\n",
    "    RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
    "    | RunnableParallel(\n",
    "        {\"relevance_score\": (\n",
    "            RunnablePassthrough()\n",
    "            | (lambda x: relevance_prompt_template.format(question=x['question'], retrieved_context=x['context']))\n",
    "            | llm\n",
    "            | str_output_parser\n",
    "        ), \"answer\": (\n",
    "            RunnablePassthrough()\n",
    "            | prompt\n",
    "            | llm\n",
    "            | str_output_parser\n",
    "        )}\n",
    "    )\n",
    "    | RunnablePassthrough().assign(final_answer=conditional_answer)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "dc5c2ab0-9191-40f7-abf2-681f1c751429",
   "metadata": {
    "executionInfo": {
     "elapsed": 5,
     "status": "ok",
     "timestamp": 1715455766512,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "dc5c2ab0-9191-40f7-abf2-681f1c751429"
   },
   "outputs": [],
   "source": [
    "rag_chain_with_source = RunnableParallel(\n",
    "    {\"context\": ensemble_retriever, \"question\": RunnablePassthrough()}\n",
    ").assign(answer=rag_chain_from_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "8b30177a-f9ab-45e4-812d-33b0f97325bd",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3524,
     "status": "ok",
     "timestamp": 1715455770031,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "8b30177a-f9ab-45e4-812d-33b0f97325bd",
    "outputId": "fb5f615f-202e-4409-f4c7-10aac3a577ee",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Question: What are Google's environmental initiatives?\n",
      "\n",
      "Relevance Score: 5\n",
      "\n",
      "Final Answer:\n",
      "Google has a comprehensive environmental sustainability strategy organized around three key pillars: empowering individuals to take action, working with partners and customers, and operating its business sustainably. Here are some of Google's environmental initiatives:\n",
      "\n",
      "1. **Empowering Individuals**:\n",
      "   - **Sustainability Features in Products**: Google has introduced eco-friendly routing in Google Maps, energy efficiency features in Google Nest thermostats, and carbon emissions information in Google Flights.\n",
      "   - **Goal Achievement**: In 2022, Google reached its target of helping 1 billion people make more sustainable choices through its products.\n",
      "\n",
      "2. **Working with Partners**:\n",
      "   - **Supplier Engagement**: Google works with its suppliers to reduce their energy consumption and greenhouse gas emissions, and audits them for compliance with environmental criteria.\n",
      "   - **Collaborations**: Google is a founding member of the iMasons Climate Accord and has partnered with organizations like The Nature Conservancy and ReFED to scale environmental solutions.\n",
      "\n",
      "3. **Operating Sustainably**:\n",
      "   - **Data Centers**: Google's data centers are designed to maximize energy, water, and material efficiency.\n",
      "   - **Circular Economy**: Google aims to make its product packaging 100% plastic-free by 2025 and achieve zero waste to landfill certification at all final assembly consumer hardware manufacturing sites.\n",
      "   - **Net-zero Carbon**: Google is working on advancing carbon-free energy and has invested in renewable energy projects like wind farms.\n",
      "\n",
      "4. **Public Policy and Advocacy**:\n",
      "   - **Engagement with Coalitions**: Google partners with the World Business Council for Sustainable Development and the World Resources Institute to promote sustainability initiatives and policies.\n",
      "\n",
      "5. **Innovation and Technology**:\n",
      "   - **AI and Data Analytics**: Google leverages AI and data analytics to drive intelligent supply chains and optimize renewable energy outputs.\n",
      "   - **Environmental Insights Explorer and Google Earth Engine**: These tools provide insights into how the planet is changing and help businesses and governments track sustainability progress.\n",
      "\n",
      "6. **Campus and Community Initiatives**:\n",
      "   - **Biodiversity and Nature**: Google enhances biodiversity through its campus developments and works to protect nature in communities where it operates.\n",
      "\n",
      "Google's long-term aspiration is to help individuals, cities, and partners collectively reduce 1 gigaton of carbon equivalent emissions annually by 2030.\n",
      "\n",
      "\n",
      "Retrieved Documents:\n",
      "Document 1:\n",
      "Content:\n",
      "The opportunity we have through our products and \n",
      "platforms is reflected in our updated environmental sustainability strategy, which focuses on where we can make the most significant positive impact. Our work is organized around three key pillars: empowering individuals to take action, working together with our partners and customers, and operating our business sustainably.\n",
      "In 2022, we reached our goal to help 1 billion people \n",
      "make more sustainable choices through our products. We achieved this by offering sustainability features like eco-friendly routing in Google Maps, energy efficiency features in Google Nest thermostats, and carbon emissions information in Google Flights. Looking ahead, our aspiration is to help individuals, cities, and other partners collectively reduce 1 gigaton of their carbon equivalent emissions annually by 2030.\n",
      " 2\n",
      "\n",
      "Document 2:\n",
      "Content:\n",
      "sustainability, and we’re partnering with them to develop decarbonization roadmaps and build essential data infrastructure to accurately quantify emissions and reductions across the value chain.\n",
      "We engage with our suppliers—including hardware \n",
      "manufacturing and indirect services suppliers—to help reduce their energy consumption and GHG emissions, as stated in our Supplier Code of Conduct , which all \n",
      "suppliers are required to sign. We assess suppliers’ practices to report, manage, and reduce their emissions and incorporate this into our supplier scorecard.\n",
      "Reporting  \n",
      "environmental data\n",
      "We expect all our suppliers to report environmental data,\n",
      "\n",
      "Document 3:\n",
      "Content:\n",
      "In 2022, we audited a subset of our suppliers to verify \n",
      "compliance for the following environmental criteria: implementation of environmental management systems, environmental permits and reporting, product content restrictions, and resource efficiency, as well as management of hazardous substances, wastewater,  solid waste, and air emissions.\n",
      "Googlers chat among indoor plants at our Pier 57 office in New York City.   79\n",
      "2023 Environmental Report  Public policy and advocacy\n",
      "We know that strong public policy action is critical to \n",
      "creating prosperous, equitable, and resilient low-carbon economies around the world. \n",
      "The United Nations Framework Convention on Climate \n",
      "Change (UNFCCC)’s 2015 Paris Agreement states that humanity must “keep global temperature rise this century well below 2°C above pre-industrial levels.”\n",
      " 143 Google\n",
      "\n",
      "Document 4:\n",
      "Content:\n",
      "that enable us to ensure that those we partner with are responsible environmental stewards. Along with having suppliers evaluate their operations, we perform our own ongoing due diligence and audits to verify compliance and to understand our supply chain’s current and potential risks.\n",
      "When we find that a supplier isn’t complying, we expect\n",
      "\n",
      "Document 5:\n",
      "Content:\n",
      "iMasons Climate AccordGoogle is a founding member and part of the governing body of the iMasons Climate Accord, a coalition united on carbon reduction in digital infrastructure.\n",
      "ReFEDIn 2022, to activate industry-wide change, Google provided anchor funding to kickstart the ReFED Catalytic Grant Fund, with the goal of accelerating and scaling food waste solutions.\n",
      "The Nature Conservancy (TNC)In 2022, Google supported three of the Nature Conservancy’s watershed projects in Chile and the United States, and Google.org supported a three-phased approach to catalyze active reforestation of kelp at impactful scales. Google.org also provided a grant to TNC to develop a machine-learning-powered timber-tracing API to stop deforestation in the Amazon at scale; a team of Google engineers is working full-time for six months with TNC to develop this product as part of the Google.org Fellowship Program.\n",
      "\n",
      "Document 6:\n",
      "Content:\n",
      "2023 Environmental Report  Risk management\n",
      "Our Enterprise Risk Management (ERM) team is responsible \n",
      "for identifying, assessing, and reporting risks related to the company’s operations, financial performance, and reputation. As with financial, operational, and strategic risks, the team assesses environmental risks as part of the company’s overall risk management framework. The risks and opportunities identified through this process support public disclosures and inform Google’s environmental sustainability strategy. Our Chief Sustainability Officer and sustainability teams work to address risks by identifying opportunities to reduce the company’s environmental impacts from its operations and value chain, and through improving climate resilience. \n",
      "Climate-related \n",
      "risks\n",
      "Climate-related risks and opportunities have long time\n",
      "\n",
      "Document 7:\n",
      "Content:\n",
      "2\n",
      "After two years of condensed reporting, we’re sharing a deeper dive into our approach in one place in our 2023 Environmental Report. In 2022, we continued to make measurable progress in many key ways, such as:\n",
      "• We enhanced and launched new sustainabilityproduct features , such as eco-friendly routing in\n",
      "Maps, which is estimated to have helped preventmore than 1.2 million metric tons of carbon emissionsfrom launch through 2022—equivalent to takingapproximately 250,000 fuel-based cars off the roadfor a year.\n",
      " 3\n",
      "• We expanded the availability of Google EarthEngine —which provides access to reliable, up-to-\n",
      "date insights on how our planet is changing—toinclude businesses and governments worldwide as anenterprise-grade service through Google Cloud.• We opened our new Bay View campus , which is\n",
      "all-electric, net water-positive, restores over 17 acresof high-value nature, and incorporates the leadingprinciples of circular design.\n",
      "\n",
      "Document 8:\n",
      "Content:\n",
      "of over 140 partner organizations.\n",
      "The Google.org Impact Challenge on Climate Innovation supports breakthrough projects that use data and technology to \n",
      "accelerate climate action.\n",
      "The journey ahead\n",
      "From measuring and monitoring changes on the Earth’s surface, improving forecast and prediction models for flooding and wildfires, optimizing operations, combining disparate data sources, and designing more efficient products, we continue to leverage our expertise in technology and apply the latest advancements to help solve global challenges.\n",
      "We believe that by working together with our partners and \n",
      "customers, we can make a real difference in addressing the challenges of climate change and ecosystem degradation. LEARN MORE\n",
      "• Data Commons\n",
      "• Environmental Insights Explorer\n",
      "• Google Cloud sustainability\n",
      "• Google Earth Engine\n",
      "• Sustainability-focused accelerators   31\n",
      "2023 Environmental Report  Operating \n",
      "sustainably\n",
      "We’re showing the way forward \n",
      "through our own operationsOur ambition\n",
      "\n",
      "Document 9:\n",
      "Content:\n",
      "Sustainable \n",
      "consumption of \n",
      "public goods (e.g., \n",
      "“right to repair”)Google submitted comments to the European Commission’s public consultation regarding \n",
      "the promotion of repair and reuse of goods. We shared our views on the core principles to \n",
      "consider when introducing policy measures to promote repair and reuse horizontally, and for \n",
      "smartphones and tablets specifically.\n",
      "Body of European \n",
      "Regulators \n",
      "for Electronic \n",
      "Communications \n",
      "(BEREC)Google responded to a questionnaire  by BEREC in view of the development of key performance \n",
      "indicators to characterize the environmental impact of electronic communications, networks, \n",
      "devices, and services. We provided information about our environmental reporting practices \n",
      "and suggestions to help identify which indicators would provide relevant environmental \n",
      "information.\n",
      "Engagement with coalitions and sustainability initiatives\n",
      "RE-Source PlatformGoogle is a strategic partner and steering committee member of the RE-Source Platform, the\n",
      "\n",
      "Document 10:\n",
      "Content:\n",
      "Our approach\n",
      "Supporting partners\n",
      "Investing in breakthrough \n",
      "innovation\n",
      "Creating ecosystems for \n",
      "collaboration\n",
      "The journey ahead\n",
      "   21\n",
      "2023 Environmental Report  Our ambition\n",
      "We believe that Google has a unique \n",
      "opportunity that extends beyond reducing \n",
      "the environmental impacts of our own \n",
      "operations and value chain. By organizing \n",
      "information about our planet and making \n",
      "it actionable through technology and \n",
      "platforms, we can help partners and \n",
      "customers create even more positive impact.\n",
      "Digital technologies  play a critical role in industry \n",
      "transitions, allowing us to measure and track sustainability \n",
      "progress, optimize the use of resources, reduce \n",
      "greenhouse gas emissions, and enable a more circular \n",
      "economy. 50 Cloud computing and digital technologies \n",
      "underpin the transformation in many sectors, such as \n",
      "energy, transportation, and agriculture. Research  that \n",
      "we commissioned in 2022 found that 20%–25% of what’s \n",
      "required for the EU’s 2050 net-zero goal requires some\n",
      "\n",
      "Document 11:\n",
      "Content:\n",
      "chemistry\n",
      "• Governance and engagement - Risk management; Stakeholder engagement - Supplier \n",
      "engagement\n",
      "Engagement with external targets and initiatives related to sustainable \n",
      "supply chains • Wor king together - Our approach - Supporting partners - Cloud customers and  \n",
      "commercial partners\n",
      "• Governance and engagement - PartnershipsC12. Engagement\n",
      "Goals and targets Supplier environmental assessment-related targets• Introd uction - Targets and progress summary\n",
      "• Oper ating sustainably - Circular economy - Our approach - Working with suppliers\n",
      "Performance indicators New suppliers that were screened using environmental criteria • Governance and engagement - Risk management C12. Engagement\n",
      "Supplier renewable energy• Opera ting sustainably - Net-zero carbon - Our approach - Advancing carbon-free energy - \n",
      "CFE inv estmentsC2. Risks and opportunities\n",
      "Negative environmental impacts in the supply chain and actions taken• Oper ating sustainably - Circular economy - Supply chain\n",
      "\n",
      "Document 12:\n",
      "Content:\n",
      "World Business Council for Sustainable Development (WBCSD)Google has been a member of the WBCSD for several years and participates in a number of its initiatives. \n",
      "We’re a\n",
      "ctively involved in initiatives related to improving well-being for people and the planet, including  \n",
      "shifting diets, consumer behavior changes, and regenerative agriculture. \n",
      "World Resources Institute (WRI)Google has a 13-year long relationship with WRI for impact-focused collaboration. Some key projects include developing a near-real-time land cover dataset ( Dynamic World ), deforestation monitoring and alerts ( Global \n",
      "Forest Watch ), ending commodity-driven deforestation and accelerating restoration ( Forest Data Partnership ), \n",
      "measuring and mitigating extreme heat ( supported by Google.org ), and educating stakeholders on 24/7 CFE.   84\n",
      "2023 Environmental Report  Awards and recognition\n",
      "2022 CDP Climate Change A List  \n",
      "Alphabet has been named to CDP’s Climate Change A list,\n",
      "\n",
      "Document 13:\n",
      "Content:\n",
      "development and deployment of these materials.\n",
      "In 2022, we filed a patent for using machine \n",
      "learning technology to improve our ability to prevent emissions from refrigerant leaks.\n",
      "Data centers\n",
      "Google’s data centers are the engine of our company, powering products like Gmail, Google Cloud, Search, and YouTube for billions of people around the world. We’ve worked to make Google’s data centers some of the most efficient in the world, improving their environmental performance even as demand for our products has risen. We’ve done this by designing, building, and operating each one to maximize efficient use of energy, water, \n",
      "and ma\n",
      "terials.\n",
      "Our long-standing data center efficiency  efforts are\n",
      "\n",
      "Document 14:\n",
      "Content:\n",
      "Googlers collaborate in the Event Center at our Bay View campus.   30\n",
      "2023 Environmental Report  Google for Startups\n",
      "By investing early in technologies aimed at tackling \n",
      "sustainability challenges like climate change, we have the potential to move the needle on sustainability and positively impact our planet. We have a portfolio of sustainability-focused accelerators , which support  \n",
      "early stage innovations to grow and scale.\n",
      "Google for Startups \n",
      "Accelerator\n",
      "Google for Startups is working to identify, support, and\n",
      "\n",
      "Document 15:\n",
      "Content:\n",
      "2023 Environmental Report  market structures. If no such structure exists, then Google defines the grid \n",
      "region as the electricity-balancing authority where our data centers are \n",
      "located. Outside of the United States, the grid region most often refers to \n",
      "the geographic boundary of a country, because most grid system operators \n",
      "operate at the national level. Certain regions that span multiple countries \n",
      "are well interconnected and could be considered as one grid; however, \n",
      "our grid mix calculations already include import and export considerations \n",
      "and therefore take into account power flows from neighboring grids. In \n",
      "the future, we may update our definition as we work with grid operators to \n",
      "better understand how transmission constraints or congestion impact CFE \n",
      "measurement within and across grid regions.\n",
      "91 Contracted CFE is the hourly electricity production from clean energy \n",
      "projects whose electricity and associated environmental attributes are\n",
      "\n",
      "Document 16:\n",
      "Content:\n",
      "the United States for pre-owned products, such as used and refurbished products. The journey \n",
      "ahead\n",
      "While a single individual’s actions may seem small, when \n",
      "billions of people have the tools to make more sustainable decisions, they add up to have a meaningful impact on their communities and the entire planet. \n",
      "We’re excited by the opportunity to enable climate and \n",
      "environmental action far beyond Google’s direct impact, through information and innovation.\n",
      "LEARN MORE\n",
      " • Empowering with technology\n",
      " • Google Maps eco-friendly routing\n",
      " • Searching for sustainability with Google\n",
      " • Supporting a clean energy future with Nest Renew\n",
      " • The search for sustainability   20\n",
      "2023 Environmental Report  Working \n",
      "together\n",
      "We’re working together with our \n",
      "partners and customers to advance technology for sustainabilityOur ambition\n",
      "Our approach\n",
      "Supporting partners\n",
      "Investing in breakthrough \n",
      "innovation\n",
      "Creating ecosystems for \n",
      "collaboration\n",
      "The journey ahead\n",
      "   21\n",
      "2023 Environmental Report  Our ambition\n",
      "\n",
      "Document 17:\n",
      "Content:\n",
      "EV Suitability Assessment helps organizations monitor their fleet of vehicles and make choices that minimize environmental impact.\n",
      "Data analytics tools from Google Cloud are also helping \n",
      "airlines. Lufthansa Group partnered with Google Cloud \n",
      "and Google Research to develop a platform that facilitates better planning and management of daily flight operations.\n",
      "We’re helping organizations harness \n",
      "the power of data and AI to drive more intelligent supply chains.Renewable energy\n",
      "Wind farms are an important source of carbon-free electricity, but wind can fluctuate depending on the weather. Through Google Cloud, customers like Engie  (a global energy and renewables supplier) can optimize their wind portfolio in short-term power markets by predicting wind power output 36 hours ahead of actual generation  and making optimal hourly delivery \n",
      "commitments to the grid, a full day in advance.  \n",
      "Sustainability partner \n",
      "solutions\n",
      "Partner solutions are important to scale the impact for our\n",
      "\n",
      "Document 18:\n",
      "Content:\n",
      "135 \n",
      "Preserving nature is critical both to mitigating climate  \n",
      "change and adapting to it. We want nature and people to flourish together in the communities that Google calls  home, as well as the ecosystems where we source food  for the hundreds of cafes we operate. Our approach\n",
      "We strive to protect and enhance nature and biodiversity through our campuses and technology.\n",
      "Google has offices in nearly 60 countries around the world \n",
      "(as of year-end 2022). In these locations, we aim to protect and enhance nature and biodiversity through a four-pillar approach that starts with building biodiversity at our own office and campus developments, as well as protecting nature and making it more accessible in the surrounding communities where we operate (see Figure 24). \n",
      "Our approach further focuses on sourcing responsibly\n",
      "\n",
      "Document 19:\n",
      "Content:\n",
      "cled/ Ongoing  36% 41% 2025our consumer hardware product portfolio by 2025 renewable material (see pg. 62 )\n",
      "% plastic-free Ongoing  Make product packaging 100% plastic-free by 2025 97% 96% 2025packaging (see pg. 63 )\n",
      "Significant  Achieve UL 2799 Zero Waste to Landfill certification at all final \n",
      "Supply chain % of sites certified 9% 90% 2022 progress  assembly consumer hardware manufacturing sites by 2022\n",
      "(see pg. 65 )   9\n",
      "2023 Environmental Report  \n",
      "Emerging opportunities\n",
      "As the world becomes increasingly aware of the need for sustainability, individuals, businesses, and communities are  \n",
      "looking for new ways to reduce their environmental impact. Artificial intelligence (AI) and the power of information to help individuals and organizations reduce emissions are two emerging opportunities that Google is focusing on to help build a more sustainable future.\n",
      "AI for sustainability\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# User Query\n",
    "result = rag_chain_with_source.invoke(user_query)\n",
    "retrieved_docs = result['context']\n",
    "\n",
    "print(f\"Original Question: {user_query}\\n\")\n",
    "print(f\"Relevance Score: {result['answer']['relevance_score']}\\n\")\n",
    "print(f\"Final Answer:\\n{result['answer']['final_answer']}\\n\\n\")\n",
    "print(\"Retrieved Documents:\")\n",
    "for i, doc in enumerate(retrieved_docs, start=1):\n",
    "    print(f\"Document {i}:\")\n",
    "    print(f\"Content:\\n{doc.page_content}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cdb918b5-f04b-471c-ad1c-7526e4971cbd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "CHAPTER8-2_HYBRID-ENSEMBLE.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
